clear all
set more off

global path "~/shared_space/dbaharc/BHOR-YugoslavianRefugees/"
qui do "${path}do_files/InitializeGlobals.do"

/*
//Use Maddison dataset for GDP and population
use ~/shared_space/dbaharc/General_Data/Madisson/mpd2018.dta , clear
keep countrycode cgdppc rgdpnapc pop year
ren countrycode iso3
tempfile wdigdp
save `wdigdp'


use "${path}data/data_mig.dta", clear
ren nation iso3
replace iso3 = "YUG" if iso3=="FYUG"
keep iso3 sitc2 year mig_tot
gcollapse (sum) totmig95=mig_tot, by(year iso3)
keep if year==1995
drop year
tempfile totmig95
save `totmig95'

use "${path}data/GermanYUGMigration.dta", replace
qui do "${path}do_files/InitializeVars.do"

merge m:1 iso3 year using `wdigdp', keep(master matched) nogen
merge m:1 iso3 using `totmig95', keep(master matched) nogen

//Keep countries with few migrants in Germany
distinct iso3 if totmig95<1000
g tokeep1000 = totmig95<1000 | iso3=="YUG"

keep if year>1985 & year<=2010
encode iso3, g(ccode)

//Compute size of export basket
egen totalexp = total(exp), by(iso3 year)
g asinhtotexp = asinh(totalexp)

//Drop countries without GDP info
g temp = mi(rgdpnapc)
egen drop = total(temp), by(iso3)
//tab iso3 if drop
drop if drop>0
drop temp


//Keep only a balanced panel taking into account country size
egen ct = count(exppc_xdeu), by(iso3)
qui su ct
local m = r(max)
keep if ct==`m'
drop ct

replace dens = . if year<=1985 //Dens is an average of 3 years so it doesnt exist before 1986

distinct iso3

g product2d = floor(product/100)
g product1d = floor(product/1000)

preserve
gcollapse (sum) exp (mean) totalexp, by(iso3 year product2d)
g exp2dshare = exp/totalexp
tempfile 2dig
save `2dig'
restore

preserve
gcollapse (sum) exp (mean) totalexp, by(iso3 year product1d)
g exp1dshare = exp/totalexp
tempfile 1dig
save `1dig'
restore

//Create one digit shares and have them in the same row for all 1-digit sectors
preserve
gcollapse (sum) exp (mean) totalexp, by(iso3 year product1d)
g exp1dshare = exp/totalexp
keep iso3 year product1d exp1dshare
reshape wide exp1dshare, i(iso3 year) j(product1d)
tempfile 1digwide
save `1digwide'
restore


merge m:1 iso3 year product1d using `1dig', keep(master matched) nogen keepusing(exp1dshare)
merge m:1 iso3 year product2d using `2dig', keep(master matched) nogen keepusing(exp2dshare)
merge m:1 iso3 year using `1digwide', keep(master matched) nogen

/// CREATE THE DATABASE FOR ROTEM to create the synthetic controls////
distinct iso3 if tokeep1000
distinct iso3
outsheet iso3 product year countryid productid exp exp_xdeu asinhexp exppc exppc_xdeu asinhexppc expshare totalexp asinhtotexp exp1dshare* exp2dshare dens pop cgdppc rgdpnapc treat2000 if tokeep1000 using "${path}data/synth/InputForSynthetic.csv", comma replace name
outsheet iso3 product year countryid productid exp exp_xdeu asinhexp exppc exppc_xdeu asinhexppc expshare totalexp asinhtotexp exp1dshare* exp2dshare dens pop cgdppc rgdpnapc treat2000 using "${path}data/synth/InputForSyntheticAll.csv", comma replace name
*/

/*** SETTING UP THE DATASET ***/
use "${path}data/GermanYUGMigration.dta", replace
keep if iso3=="YUG"
qui do"${path}do_files/InitializeVars.do"

preserve
//insheet using "${path}data/synth/df_total.exp.csv", clear
insheet using "${path}data/synth/df_total.ALL.csv", clear //USING FILE WITH ALL 100+_COUNTRIES
keep product wweight year
ren wweight synthexp_xdeu
tempfile synth
save `synth'
restore

merge 1:1 product year using `synth', keep(matched) nogen

//Marking those products for which their sum squared errors are above 95th percentile and below 5th percentile for robustness
preserve
g diff = exp_xdeu - synthexp_xdeu if year<=1995
g diff_sq = diff^2
gcollapse (sum) diff_sq, by(product)
su diff_sq, d
g outlier_hi = diff_sq>r(p90)
g outlier_lo = diff_sq<r(p10)
g outlier =  outlier_hi | outlier_lo
keep product outlier
tempfile outlier
save `outlier'
restore
merge m:1 product using `outlier', keep(master matched) nogen


/*** Create graphs with all products, and those with high and low treatment ***/
g diff = (exp_xdeu - synthexp_xdeu)/1000000

preserve
drop if outlier
xtile treat_qt = treat2000, n(4)

gcollapse (mean) diff (sd) sddiff=diff, by(year treat_qt)
keep if inlist(treat_qt,1, 4)
ren treat_qt product
g asinhdiff = asinh(diff)
keep diff product year asinhdiff
tempfile meandiff
save `meandiff'
restore

append using `meandiff'

levelsof product if !outlier & !inlist(product,1,4), local(productlist)
local numberproducts = r(r)
local graphcommand ""
foreach p of local productlist {
    local graphcommand "`graphcommand' (line diff year if product==`p', lc(gs10)) "
}

twoway `graphcommand' ///
        (function y=0, range(year) lp(dash) lc(red)), xline(1995, lp(dot)) scheme(s1mono) ///
        ytitle("Diff. Exports Real - Synth" "(US $ millions)") xtitle(Year) xlabel(1986 1990(5)2010)  ///
        legend(order(1 "All Products")) name(allproducts)
graph export "${path}graphs/DiffSynthReal_Slides1.eps", replace as(eps)

twoway (line diff year if product==1, lc(gs5) lp(longdash))  ///
       (line diff year if product==4, lc(gs0) lw(thick))  ///
       (function y=0, range(year) lp(dash) lc(red)) ///
       if year>1985 & year<=2010,  ///
       scheme(s1mono) legend(order(1 "Avg. low treatment" 2 "Avg. high treatment") rows(1)) ///
       ytitle("Diff. Exports Real - Synth" "(US $ millions)") xtitle(Year) xlab(1986 1990(5)2010) xline(1995, lp(dot) lc(red)) ///
       name(average)
graph export "${path}graphs/DiffSynthReal_Slides2.eps", replace as(eps)

graph combine allproducts average, rows(2) ysize(8) xsize(6) scheme(s1color)
graph export "${path}graphs/DiffSynthReal.eps", as(eps) replace

//B&W
graph combine allproducts average, rows(2) ysize(8) xsize(6) scheme(s1mono)
graph export "${path}graphs/DiffSynthReal_BW.eps", as(eps) replace

/*
preserve
keep if !outlier
//bysort year: center treat2000, s
//g treat_am = c_treat2000>0
replace exp_xdeu = exp_xdeu/1000000
replace synthexp_xdeu = synthexp_xdeu/1000000

gcollapse (mean) diff (sum) exp_xdeu synthexp_xdeu (sd) sdsynthexp=synthexp_xdeu sddiff=diff, by(year treat_qt)
g ulsynth = synthexp_xdeu+1.96*sdsynthexp
g llsynth = synthexp_xdeu-1.96*sdsynthexp
g uldiff = diff+1.96*sddiff
g lldiff = diff-1.96*sddiff

twoway (line exp_xdeu year if treat_qt==1, lc(gs5))  ///
       (line synthexp_xdeu year if treat_qt==1, lp(dash) lc(gs5)) ///
       (line exp_xdeu year if treat_qt==4, lc(gs10) lw(thick))  ///
       (line synthexp_xdeu year if treat_qt==4, lp(dash) lc(gs10) lw(thick)) ///
       if year>1985 & year<=2010,  ///
       scheme(s1color) legend(order(1 "1st quartile" 2 "1st quartile (synth)" 3 "4th quartile" 4 "4th quartile (synth)") rows(2)) ///
       ytitle("Yearly exports USD, millions") xtitle(Year) xlab(1986(1)2010, labsize(small)) xline(1995, lp(dot) lc(red)) 

twoway (line diff year if treat_qt==1, lc(gs5))  ///
       (line diff year if treat_qt==4, lc(gs10) lw(thick))  ///
       (function y=0, range(year) lp(dash) lw(medthick) lc(red)) ///
       if year>1985 & year<=2010,  ///
       scheme(s1color) legend(order(1 "1st quartile" 2 "4th quartile") rows(1)) ///
       ytitle("Avg. Difference Exports Real - Synthetic (US$ millions)") xtitle(Year) xlab(1986 1990(5)2010) xline(1995, lp(dot) lc(red)) 
graph export "${path}graphs/TreatExportsSynth_yby.eps", as(eps) replace
restore
*/

/*** Table (Triple difference) ***/
ren (exp_xdeu synthexp_xdeu) (exp_xdeu0 exp_xdeu1)
drop exp_xdeu1985  exp_xdeu1990  exp_xdeu1995  exp_xdeu2000  exp_xdeu2005  exp_xdeu2010
reshape long exp_xdeu, j(synth) i(product year)
label var synth "synth"

preserve
egen temp1 = total($expdv) if year>=1988 & year<=1990, by(product synth)
egen temp2 = total($expdv) if year>=2005 & year<=2007, by(product synth)
g cumexp = temp1/3 if year==1990
replace cumexp = temp2/3 if year==2005
drop temp1 temp2
replace lnexp = log(cumexp)
replace lnexpplus1 = log(cumexp+1)
replace asinhexp = asinh(cumexp)
foreach v in lnexp lnexpplus1 asinhexp {
    eststo, title(`v'): qui reghdfe `v' c.treat2000#c.$after c.treat2000#c.$after#c.synth if $didsample, a(product synth year) cluster(product) old
}
foreach v in lnexp lnexpplus1 asinhexp {
    eststo, title(`v'): qui reghdfe `v' c.treat2000#c.$after c.treat2000#c.$after#c.synth if !outlier & $didsample, a(product synth year) cluster(product) old
}
estout, $estout_params_txt mgroups("All" "No Outliers", $estout_mgroups_ops) 
estout using "${path}tables/DIDSynthControls.tex", replace $estout_params $estout_pre $estout_post mgroups("All" "No Outliers", $estout_mgroups_ops) 
eststo clear
restore

/*
preserve
egen temp1 = total($expdv) if year>=1988 & year<=1990, by(product synth)
egen temp2 = total($expdv) if year>=2005 & year<=2007, by(product synth)
g cumexp = temp1/3 if year==1990
replace cumexp = temp2/3 if year==2005
drop temp1 temp2
replace lnexp = log(cumexp)
replace lnexpplus1 = log(cumexp+1)
replace asinhexp = asinh(cumexp)
foreach v in lnexp lnexpplus1 asinhexp {
    eststo, title(`v'): qui reghdfe `v' c.treat2000#c.$after if !synth & $didsample, a(product synth year) cluster(product) old
}
foreach v in lnexp lnexpplus1 asinhexp {
    eststo, title(`v'): qui reghdfe `v' c.treat2000#c.$after if synth & $didsample, a(product synth year) cluster(product) old
}
estout, $estout_params_txt mgroups("Real" "Synth", $estout_mgroups_ops) 
//estout using "${path}tables/DIDSynthControls.tex", replace $estout_params $estout_pre $estout_post mgroups("All" "No Outliers", $estout_mgroups_ops) 
eststo clear
restore
*/

/*Graph comparing difference in estimation for real vs synthetic*/
cap drop treat_qt
local qtiles 4
local missingqt 2
xtile treat_qt = treat2000, n(`qtiles')
levelsof treat_qt, local(qt_list_comp)
local qt_list = subinstr("`qt_list_comp'","`missingqt'","",1)
di "`qt_list'"

preserve
label var treat_qt ""
egen temp1 = total($expdv) if year>=1988 & year<=1990, by(product synth)
egen temp2 = total($expdv) if year>=2005 & year<=2007, by(product synth)
g cumexp = temp1/3 if year==1990
replace cumexp = temp2/3 if year==2005
drop temp1 temp2
replace lnexp = log(cumexp)
replace lnexpplus1 = log(cumexp+1)
replace asinhexp = asinh(cumexp)
foreach v in lnexp lnexpplus1 asinhexp {
    matrix R`v' = J(1,4,.)
    eststo, title(`v'): qui reghdfe `v' i(`qt_list').treat_qt#c.$after i.treat_qt#c.$after#c.synth if  $didsample, a(product year synth) cluster(product) old
    forvalues i=1/`qtiles' {
        if `i'==`missingqt' {
            qui lincom 0 - _b[`i'.treat_qt#c.$after#c.synth]
        }
        else {
            qui lincom _b[`i'.treat_qt#c.$after]- _b[`i'.treat_qt#c.$after#c.synth]
        }
        matrix R`v' = R`v' \ [`i', r(estimate),r(se),1] 
    }
    eststo, title(`v'): qui reghdfe `v' i(`qt_list').treat_qt#c.$after i.treat_qt#c.$after#c.synth if  $didsample & !outlier, a(product year synth) cluster(product) old
    forvalues i=1/`qtiles' {
        if `i'==`missingqt' {
            qui lincom 0 - _b[`i'.treat_qt#c.$after#c.synth]
        }
        else {
            qui lincom _b[`i'.treat_qt#c.$after]- _b[`i'.treat_qt#c.$after#c.synth]
        }
        matrix R`v' = R`v' \ [`i', r(estimate),r(se),0] 
    }
    
}
estout, $estout_params_IV_txt mgroups("All" "No Outliers", $estout_mgroups_ops) 
eststo clear

foreach j in 0 1 {
    foreach v in lnexp lnexpplus1 asinhexp {
        clear
        svmat R`v'
        drop if _n==1
        ren (R`v'1 R`v'2 R`v'3 R`v'4) (qt beta sd outlier)
        g ul = beta+1.645*sd
        g ll = beta-1.645*sd
        twoway (scatter beta qt) (rcap ul ll qt, lc(gs10)) if outlier==`j', name(`v', replace) yline(0, lp(dash)) scheme(s1color) xtitle("Treatment Intensity") ///
            ytitle("E({&beta}{sup:Real}-{&beta}{sup:Synth}) (95% CI)") legend(off) title("`v'") xtick(`qt_list_comp') xlabel(`qt_list_comp')
        if "`v'" == "asinhexp" {
            graph export "${path}graphs/BetaRealSynthOutliers`j'_`v'.eps", as(eps) replace
        }
    }
}
restore

